# USE R/4.4.3 FOR REPLICATION
rm(list = ls())


### Specify working directory path here 
### Make sure working directory path points to the replication folder
path = ""
setwd(path)


# Load groundhog 
install.packages("groundhog") # Install groundhog
library(groundhog)

# You can install packages with groundhog or manually
groundhog.library(c("dplyr", "haven", "foreign", "lmtest", "sandwich",
                    "stargazer", "sjmisc", "ggplot2", "reshape2",
                    "xtable", "tidyr", "estimatr", "ineq", "multcomp",
                    "broom", "texreg", "Matching", "ggpubr", "sf",
                    "stringr", "lfe", "lme4", "RColorBrewer", "sensemakr"), 
                  date = "2025-03-20")


# Install packages manually, if groundhog does not work:
# install.packages("devtools")
# library(devtools)
# install_version("dplyr", version = "1.1.4")
# install_version("haven", version = "2.5.4")
# install_version("foreign", version = "0.8-88")
# install_version("lmtest", version = "0.9-40")
# install_version("sandwich", version = "3.1-1")
# install_version("stargazer", version = "5.2.3")
# install_version("sjmisc", version = "2.8.10")
# install_version("ggplot2", version = "3.5.1")
# install_version("reshape2", version = "1.4.4")
# install_version("xtable", version = "1.8-4")
# install_version("tidyr", version = "1.3.1")
# install_version("estimatr", version = "1.0.6")
# install_version("ineq", version = "0.2-13")
# install_version("multcomp", version = "1.4-28")
# install_version("broom", version = "1.0.7")
# install_version("texreg", version = "1.39.4")
# install_version("Matching", version = "4.10-15")
# install_version("ggpubr", version = "0.6.0")
# install_version("sf", version = "1.0-19")
# install_version("stringr", version = "1.5.1")
# install_version("lfe", version = "3.1.1")
# install_version("lme4", version = "1.1-36")
# install_version("RColorBrewer", version = "1.1-3")


# Attach libraries manually if groundhog does not work
# library(dplyr)
# library(haven)
# library(foreign)
# library(lmtest)
# library(sandwich)
# library(stargazer)
# library(sjmisc)
# library(ggplot2)
# library(reshape2)
# library(xtable)
# library(tidyr)
# library(estimatr)
# library(ineq)
# library(multcomp)
# library(broom)
# library(texreg)
# library(Matching)
# library(ggpubr)
# library(sf)
# library(stringr)
# library(lfe)
# library(lme4)
# library(RColorBrewer)
# library(sensemakr)





### Functions for the analysis ###
pct_maker = function(x, y){
  x_pct = ifelse(x==0 | y==0, NA, x/y)
  return(x_pct)
}

count_nonna = function(x){
  count = sum(!is.na(x))
  return(count)
} 

mean_nona = function(x){
  count = mean(x, na.rm = T)
  return(count)
} 

sd_nona = function(x){
  count = sd(x, na.rm = T)
  return(count)
} 

destring = function(x){
  destringed = x %>% as.character() %>% as.numeric()
  return(destringed)
}

# Plot function
interact_plot_fun = function(model, cluster, terms, labs, name){
  
  pl1 = sjPlot::plot_model(model, title = "", 
                           type = "eff", terms = terms, se = FALSE)
  
  eff.plot = pl1 + labs(x = labs[["x"]], y = labs[["y"]]) + 
    scale_color_manual(labels = c("Non-reserved", "Reserved"), 
                       name = name, values = c("#FDE725FF", "#481567FF"))+
    scale_fill_manual(labels = c("Non-reserved", "Reserved"), 
                      name = name, values = c("#FDE725FF", "#481567FF"))
  return(eff.plot)
  
}

cl  <- function(fm, cluster) {
  library(sandwich)
  M <- length(unique(cluster))   
  N <- length(cluster)              
  K <- fm$rank                   
  dfc <- (M/(M-1))*((N-1)/(N-K-1))
  uj  <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc * sandwich(fm, meat = crossprod(uj)/N)
  return(vcovCL)
}

predict.rob <- function(x,clcov,newdata){
  if(missing(newdata)){ newdata <- x$model }
  tt <- terms(x)
  Terms <- delete.response(tt)
  m.mat <- model.matrix(Terms,data=newdata)
  out <- which(!(colnames(m.mat) %in% colnames(clcov)))
  m.mat <- m.mat[,-c(out)]
  fit <- as.vector(m.mat %*% x$coefficients[!is.na(x$coefficients)])
  se.fit <- sqrt(diag(m.mat%*%clcov%*%t(m.mat)))
  return(list(fit=fit,se.fit=se.fit))
}

interact_plot_fun2 = function(model, data, cluster, res2, w.res,
                              labs, name, interactor){
  cluster = enquo(cluster)
  w.res = enquo(w.res)
  res2 = enquo(res2)
  interactor = enquo(interactor)
  
  newdat = model$model %>%
    mutate(rown = rownames(.)) %>%
    left_join(data %>% ungroup %>%
                mutate(rown = rownames(.)) %>%
                dplyr::select(rown, !!cluster)) 
  clvcov = vcovCL(model, cluster = data %>% ungroup %>% dplyr::select(!!cluster))
  pred = predict.rob(x = model, clcov = clvcov, newdata = model$model)
  
  newdat = bind_cols(newdat, 
                     fit = pred$fit, se = pred$se.fit)
  
  plot = ggplot(newdat, aes(x = !!interactor, y = fit), group = !!w.res)+
    geom_point(aes(color = !!w.res))+
    geom_smooth(method = "lm", aes(color = !!w.res), 
                se =  TRUE, method.args = list(cluster = newdat %>% dplyr::select(!!cluster)), 
                fullrange = T)+
    geom_rug(sides = "b")+
    labs(x = labs[["x"]], y = labs[["y"]]) + 
    scale_color_manual(labels = c("Non-reserved", "Reserved"), 
                       name = name, values = c("#FDE725FF", "#481567FF"))+
    scale_fill_manual(labels = c("Non-reserved", "Reserved"), 
                      name = name, values = c("#FDE725FF", "#481567FF"))+
    facet_grid(cols = vars(!!res2))
  return(plot)
  
}





summary.fun = function(data, varnames){
  summary = data %>% ungroup() %>%
    summarize_at(vars(varnames), .funs = list(mean_nona, sd_nona, count_nonna)) %>%
    reshape2::melt() %>%
    mutate(var = sapply(str_split(variable, pattern = "_fn"), FUN = function(x){x[[1]]}),
           fun = sapply(str_split(variable, pattern = "_fn"), FUN = function(x){x[[2]]}),
           fun2 = case_when(
             fun=="1" ~ "mean",
             fun=="2" ~ "sd",
             fun=="3" ~ "count"
           )) %>%
    pivot_wider(id_cols = c("var"), names_from = fun2, values_from = value) %>%
    mutate(se = sd/sqrt(count))
  return(summary)
}


bal_tab <- function(data, group.var, balancevar, ...){
  
  group = enquo(group.var)
  balancevar = enquo(balancevar)
  balancevar2 = enquos(...)
  
  bal.data = data %>% dplyr::select(!!group, !!balancevar,
                                    ...) %>%
    filter(!is.na(!!group)) %>% as.data.frame()
  
  balance.tab1 = bal.data %>%
    group_by(!!group) %>%
    summarise_at(vars(!!balancevar, ...), funs(mean), na.rm = T) %>%
    t() %>%
    as.data.frame() %>%
    mutate(varnm = rownames(.)) %>%
    rename(mean0 = V1, mean1 = V2) %>%
    bind_cols(bal.data %>%
                group_by(!!group) %>%
                summarise_at(vars(!!balancevar, ...),
                             funs(sd), na.rm = T) %>%
                t() %>% as.data.frame() %>%
                mutate(varnm = rownames(.)) %>%
                rename(sd0 = V1, sd1 = V2) %>%
                dplyr::select(-varnm)) %>%
    dplyr::select(varnm, mean0, sd0, mean1, sd1) %>%
    mutate(n0 = bal.data %>% filter(!!group==0) %>% nrow(),
           n1 = bal.data %>% filter(!!group==1) %>% nrow(),
           diff = mean1 - mean0)
  
  balance.tab = apply(bal.data[,-1] , MARGIN = 2,
                      FUN = function(x) t.test(x ~ bal.data[,1], data = bal.data)$p.value) %>%
    as.matrix() %>% round(3) %>%
    as.data.frame() %>% mutate(varnm = rownames(.)) %>%
    rename(p.val = V1) %>%
    left_join(apply(bal.data[,-1] , MARGIN = 2,
                    FUN = function(x) t.test(x ~ bal.data[,1], data = bal.data)$conf.int[[1]]) %>%
                as.matrix() %>% round(3) %>%
                as.data.frame() %>% mutate(varnm = rownames(.)) %>%
                rename(ci.lo = V1)) %>%
    left_join(apply(bal.data[,-1] , MARGIN = 2,
                    FUN = function(x) t.test(x ~ bal.data[,1], data = bal.data)$conf.int[[2]]) %>%
                as.matrix() %>% round(3) %>%
                as.data.frame() %>% mutate(varnm = rownames(.)) %>%
                rename(ci.hi = V1)) %>%
    left_join(balance.tab1)
  
  
  balance.plot = ggplot(data = balance.tab)+
    geom_pointrange(aes(x = as.factor(varnm), y = mean0-mean1,
                        ymin = ci.lo, ymax = ci.hi, 
                        color = p.val<0.05),size = 1)+
    geom_hline(yintercept = 0, linetype = "dashed")+
    scale_color_manual(values = c("gray31", "red"))+
    coord_flip()+
    labs(y = "Difference", x = "Variable")
  
  
  
  return(list(balance.tab, balance.plot))
  
}

bal_plot = function(data, labs){
  
  balance.plot = ggplot(data = data)+
    geom_pointrange(aes(x = as.factor(varnm), y = mean0-mean1,
                        ymin = ci.lo, ymax = ci.hi,
                        color = p.val<0.05), size = 1)+
    geom_hline(yintercept = 0, linetype = "dashed")+
    scale_x_discrete(labels = labs)+
    scale_color_manual(values = c("gray31", "red"))+
    coord_flip()+
    labs(y = "Difference (Mean 1 - Mean 0)", x = "Variable")+
    facet_grid(.~sample)
  return(balance.plot)
  
}



## Individual plots
effect_size_plot = function(data, outcome, sample, model, vstvar, vwvar, coef_dif_p_w, coef_dif_p_st){
  vstvar = ifelse(is.null(vstvar), "vstreserv", vstvar)
  vwvar = ifelse(is.null(vwvar), "vwreserv", vwvar)
  vw_vst_var = paste0(vwvar,":",vstvar)
  # Calculate effect sizes
  sample = paste0(sample, ", Outcome: ", outcome)
  women_effect = tidy(model)$estimate[tidy(model)$term==vwvar]
  st_effect = tidy(model)$estimate[tidy(model)$term==vstvar]
  form_st = paste(vstvar, " + ", vw_vst_var, " = 0")
  combined_st_lht = glht(model, linfct = form_st)
  combined_st_effect = tidy(combined_st_lht)$estimate
  
  form_w = paste(vwvar, " + ", vw_vst_var, " = 0")
  combined_w_lht = glht(model, linfct = form_w)
  combined_w_effect = tidy(combined_w_lht)$estimate
  
  
  # Add standard errors
  women_se = tidy(model)$std.error[tidy(model)$term==vwvar]
  st_se = tidy(model)$std.error[tidy(model)$term==vstvar]
  combined_w_se = tidy(combined_w_lht)$std.error
  combined_st_se = tidy(combined_st_lht)$std.error
  
  # Add p-values
  women_p = tidy(model)$p.value[tidy(model)$term==vwvar]
  st_p = tidy(model)$p.value[tidy(model)$term==vstvar]
  combined_w_p = tidy(combined_w_lht)$adj.p.value
  combined_st_p = tidy(combined_st_lht)$adj.p.value
  
  
  
  figure_table = data.frame(
    estimate = c(women_effect, st_effect, combined_w_effect, combined_st_effect),
    std.error = c(women_se, st_se, combined_w_se, combined_st_se),
    p.value = c(women_p, st_p, combined_w_p, combined_st_p),
    labels = c(4, 3, 2, 1), 
    coef_dif_p = c(coef_dif_p_w, coef_dif_p_st,NA_real_, NA_real_)
  ) %>% mutate(lab_order = factor(labels, labels = c("Combined ST Effect", "Combined Women Effect", "ST Res.","Women's Res.")))
  

  return(figure_table)
}

# Table-maker
table_maker = function(model, se, title, out, dep.var.labels, lines, omit, covar.lab = NULL){
  stargazer(model, se = se, title = title,
            omit = omit,
            keep.stat = c("adj.rsq", "n"),
            star.char = c("+", "*", "**", "***"),
            star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
            out = out, dep.var.labels = dep.var.labels,
            add.lines = lines,
            covariate.labels = covar.lab)
}






dir.create(paste0("final_output"))
dir.create(paste0("final_output/tables"))
dir.create(paste0("final_output/figures"))
tab.out = paste0("final_output/tables/")
fig.out = paste0("final_output/figures/")

### LOAD DATA ###
reds = read.csv("data/reds.csv")
dn.dta = read.csv("data/dn_dta.csv")
dn.dta.vill = read.csv("data/dn_vill_dta.csv")


### RUN MATCHING ALGORITHM ###
source("code/_2_matching.R")

# Get matched data
reds.st.matched = read.csv("data/REDS_matched_vstreserv.csv")
reds.scst.matched = read.csv("data/REDS_matched_vscstreserv.csv")

### DESCRIPTIVE FIGURES AND TABLES ###   
source("code/_3_descriptives.R")

### INTER-CASTE INTERACTIONS ### 
source("code/_4_inter_caste_relations.R")

### MARRIAGE ###
source("code/_5_marriage.R")

### CONFLICT ####
reds_rand = reds %>% filter(nr_short!=1) # Data only from states with random assignment
source("code/_6_conflict.R")

### NET EFFECTS ###
source("code/_7_net_effect.R")

### ALTERNATIVE MECHANISMS ###
source("code/_8_alternative_mech.R")

### REDISTRIBUTION ###
source("code/_9_redistribution.R")

### ILLUSTRATION ###
source("code/_10_quota_illustration.R")

### GLOBAL QUOTAS ###
source("code/_11_global_quotas.R")

### CANDIDACY ###
source("code/_12_candidacy.R")


